# PCA
pc <- prcomp(NppDataM)
PC1 <- as.data.frame(pc$x[, 1])
PC2 <- as.data.frame(pc$x[, 2])
PC.Npp <- cbind(PC1, PC2)
PC.Npp <- cbind(PC.Npp, labels)
names(PC.Npp) <- c('PC1', 'PC2', 'labels')
plot2 <- ggplot(PC.Npp, aes(PC1, PC2, col = labels)) + geom_point() + labs(title = "NPP")#+ theme(legend.position = "none")
plot2
#x_train <- x_test <- NppDataM
library(keras)
K <- keras::backend()
## Deep learning model
input_size <- ncol(x_train) ## 1000 genes
hidden_size <- 10 ## 10 dimensional hidden layer
code_size <- 2 ## 2 dimensional encoding
input <- layer_input(shape=c(input_size))
hidden_1 <- layer_dense(input, hidden_size) %>%
layer_activation_leaky_relu() %>%
layer_dropout(rate=0.1)
code <- layer_dense(hidden_1, code_size) %>%
layer_activation_leaky_relu()
hidden_2 <- layer_dense(code, units=hidden_size) %>%
layer_activation_leaky_relu()
output <- layer_dense(hidden_2, units=input_size, activation="sigmoid")
## input and output should be the same
autoencoder <- keras_model(input, output)
## encoder from input to code space
encoder <- keras_model(input, code)
## Learn
autoencoder %>% compile(optimizer='adam',
loss='categorical_crossentropy',
metrics='mae')
autoencoder %>% fit(
x_train, x_train,
shuffle=TRUE,
epochs=50,
batch_size=100,
validation_data=list(x_test, x_test)
)
## predict code space using deep learning model
x_test_encoded <- predict(encoder,
x_test,
batch_size=100)